/*******************************************************************************
																				
	DESCRIPTION: 	This do file creates the dataset with variables indicating 
					that an individual bought additional unemployment insurance 
					and was a member of a union.		
	
*******************************************************************************/

clear all
global id_code 001_8

********************************************************************************
* A1: AKAS data - combine all years 
********************************************************************************

use "${AKAS}/AKAS_2002.dta", clear
gen year=2002

forvalues year=2003/2006 {
	append using "${AKAS}/AKAS_`year'.dta"
	replace year=`year' if year==.
}

duplicates drop // 465 observations deleted

********************************************************************************
* A2: AKAS data - cleaning variables (unemployment insurance)
********************************************************************************

* Generate a dummy indicating any additional UI
rename UKUAKAS amountAddlUI
gen additionalUI=(amountAddlUI>0 & amountAddlUI!=.)
preserve
keep Lop* year additionalUI 
duplicates drop // 14 observations deleted
save "${data_intermediate}/${id_code}_UI_clean.dta", replace
restore

********************************************************************************
* A3: AKAS data - cleaning variables (union membership)
********************************************************************************

* Generate a dummy indicating membership in a union
rename UKUFAC amountUnionContr
gen unionMember=(amountUnionContr>0 & amountUnionContr!=.)

keep Lop* year unionMember
duplicates drop // 57 observations dropped
duplicates report Lop* year // surplus of 216

* For duplicates keep the one that indicates membership if two contradicting dummies
bysort Lop* year: egen unionMemberMax=max(unionMember)
drop unionMember
rename unionMemberMax unionMember
duplicates drop // 216 observations deleted
duplicates report Lop* year // no surplus

* Generate a year for each individual 
preserve
use "${data}/001_1_UnemploymentSpells.dta", clear
keep Lop*
duplicates drop
expand 5
bysort Lop*: gen year = _n + 2001
tempfile temp
save `temp'
restore

merge 1:1 Lop* year using `temp'
drop if _merge==1
drop _merge

replace unionMember=0 if unionMember==.

* Generate a lag variable
xtset Lop* year
gen L_unionMember=L.unionMember
save "${data_intermediate}/${id_code}_unionMember.dta", replace


********************************************************************************
* B1: UI_fund data - clean variables
********************************************************************************

use "${UI_fund}/UI_fund.dta", clear
duplicates drop // 288 observations deleted
duplicates report Lop*
keep Lop* bkassa* // surplus of 150

* For every person take the highest value of the dummy indicating whether
* they bought insurance in that year
forvalues i = 5(1)9 {
	bysort Lop*: egen additionalUI200`i'=max(bkassa0`i')
	}

keep Lop* additionalUI*
* Delete dupliactes for the same person -> observations for the same person should be the same now
duplicates drop // 150 observations deleted 
duplicates report LopNr_PersonNr // no duplicates

* Reshape the data 
reshape long additionalUI, i(LopNr_PersonNr) j(year)

save "${data_intermediate}/${id_code}_UI_clean2005-2009.dta", replace

********************************************************************************
* C1: Combine UI_fund and AKAS data
********************************************************************************

use "${data_intermediate}/${id_code}_UI_clean2005-2009.dta", clear
append using "${data_intermediate}/${id_code}_UI_clean.dta"

duplicates drop // 8 073 076 observations deleted
* Check whether there are individuals with the same year and ID but different dummy
duplicates report Lop* year 
/*There are 900 K obsrevations for which we have a record that they have additional UI in one dataset but not in the other one. In such cases, we record them as individuals who have additional UI */

bysort Lop* year: egen additionalUInew=max(additionalUI)
drop additionalUI
rename additionalUInew additionalUI

* Delete duplicates for the same person in the same year -> observations for the same person in the same year should be the same now
duplicates drop // 456 450 observations deleted
duplicates report Lop* year // no duplicates

* Generate a year for each individual 
preserve
use "${data}/001_1_UnemploymentSpells.dta", clear
keep Lop*
duplicates drop
expand 8
bysort Lop*: gen year = _n + 2001
tempfile temp
save `temp'
restore

merge 1:1 Lop* year using `temp'
drop if _merge==1
drop _merge

* Assume that if an individual is not in this dataset then that means they didn't buy extra insurance
replace additionalUI=0 if additionalUI==.

* Generate a lag variable
xtset Lop* year
gen L_additionalUI=L.additionalUI

keep Lop* year L_additionalUI

save "${data_intermediate}/${id_code}_UI_final.dta", replace